---
title: "Identifying Price Informativeness"
author: "Eduardo Davila^[Yale] & Cecilia Parlatore^[NYU Stern]"
date: "`r Sys.Date()`"
output:
  html_document: default
  pdf_document: default
---

```{r}

library(here); library(tidyverse); library(data.table); library(lubridate); library(zoo)

load("input/ibes_detail.RData")
load("input/ibes_actual.RData")
load("input/ibes_excl.RData")
load("input/ibes_crsp_adjust.RData")
load("intermediate/parameters.RData")

df_actual_ueps <- df_actual_ueps %>% rename(actual_date = anndats)

df_actual_ueps <- df_actual_ueps %>% drop_na() %>% distinct()
df_detail_ueps <- df_detail_ueps %>% drop_na() %>% distinct()
df_excl_ueps <- df_excl_ueps %>% drop_na() %>% distinct()
ibes_crsp_adjust <- ibes_crsp_adjust %>% drop_na() %>% distinct()

ibes_crsp_adjust <- ibes_crsp_adjust %>% 
  ungroup() %>%
  complete(nesting(permno), date = seq(min(date), max(date), by = "day")) %>%
  fill(everything()) %>%
  drop_na()

df_detail_ueps <- df_detail_ueps %>% 
  left_join(df_excl_ueps, by=c("actdats", "ticker", "fpi", "cusip", "estimator", "analys", "measure", "oftic", "fpedats"), multiple = "all") %>% 
  mutate(excfla = if_else(is.na(excfla), "", excfla)) %>%
  filter(excfla != "X") %>%
  select(-excfla, -excdats)

df_actual_ueps <- df_actual_ueps %>% 
  mutate(l4_pends=pends %m+% years(1),
         l4_actual_date=actual_date %m+% years(1),
         l4_value_actual=value)

df_ueps <- df_detail_ueps %>%
  inner_join(df_actual_ueps, by=c("fpedats" = "l4_pends", "oftic" = "oftic", "measure" = "measure"), suffix = c("", "_actual"), multiple = "first")

df_ueps <- df_ueps %>%
  left_join(ibes_crsp_adjust, by=c("anndats" = "date", "ticker" = "ticker"), multiple = "any") %>%
  left_join(ibes_crsp_adjust, by=c("l4_actual_date" = "date", "ticker" = "ticker"), suffix = c("", "_actual"), multiple = "any") %>%
  select(-permno_actual)

df_ueps <- df_ueps %>%
  mutate(value = value * cfacshr_actual / cfacshr) %>%
  select(-cfacshr, -cfacshr_actual)

df <- df_ueps
df <- df %>% 
  drop_na() %>%
  mutate(q_ahead = as.integer(fpi) - 5) %>%
  select(-fpi)

if(q_lag == 1){
  df1 <- df %>%
    filter(measure == "EPS") %>%
    filter(q_ahead == 1) %>%
    select(-measure, -q_ahead)
  
  df2 <- df %>%
    filter(measure == "EPS") %>%
    filter(q_ahead == 1) %>%
    select(-measure, -q_ahead)
}else{
  df1 <- df %>%
    filter(measure == "EPS") %>%
    filter(q_ahead == 4) %>%
    select(-measure, -q_ahead)
  
  df2 <- df %>%
    filter(measure == "EPS") %>%
    filter(q_ahead == 4) %>%
    select(-measure, -q_ahead)
}

df_summary1 <- df1 %>%
  group_by(fpedats, permno) %>%
  summarize(count = n(), forecast = mean(value, na.rm=TRUE), value_actual=mean(value_actual, na.rm=TRUE)) %>%
  mutate(month = month(fpedats), year = year(fpedats)) %>%
  ungroup() %>%
  mutate(forecast_growth=(forecast-value_actual)/value_actual,
         forecast_diff=forecast-value_actual) %>%
  select(-fpedats, -forecast, -value_actual)
  
df_summary2 <- df2 %>%
  group_by(fpedats, permno) %>%
  summarize(future_forecast = mean(value, na.rm=TRUE), value_actual=mean(value_actual, na.rm=TRUE)) %>%
  mutate(month = month(fpedats-91.5*q_lag), year = year(fpedats-91.5*q_lag),
         future_fpedats=fpedats) %>%
  ungroup() %>%
  mutate(future_forecast_growth=(future_forecast-value_actual)/value_actual,
         future_forecast_diff=future_forecast-value_actual) %>%
  select(-fpedats, -future_forecast, -value_actual)
  
ibes_summary <- df_summary1 %>%
  left_join(df_summary2, by=join_by(permno, month, year)) %>% 
  drop_na()

analyst_coverage <- ibes_summary %>%
  select(permno, count, month, year) %>%
  rename(analyst_count = count)

ibes_summary <- ibes_summary %>%
  arrange(permno, year, month) %>%
  select(permno, year, month, forecast_diff, forecast_growth,
         future_forecast_diff, future_forecast_growth)

save(ibes_summary, analyst_coverage, file = "intermediate/ibes.RData")

```